OPTIONS LINESIZE=80;
TITLE 'SBmacro';

*********;
*SCORING;

/*
Non-trivial variables:
TRIAL_COND is the name of the variable, in the input data, that indicates the response
condition. The macro computes a mean for each of these conditions, and then a d-score, which is
the difference between the mean of this condition and the overall mean, divided by the overall
stddev. 
To remind you, IAT D is (MB2-MB1)/STDpool. So, if MB2 is 900 and MB1 and 700, and STDpool is 200. Then D = 1.
Now, in the present macro D2 = (MB2-M)/STDpool, 900-800/200 = 0.5. D1 = (MB1-M)/STDpool, (700-800)/200 = -0.5.
And the preferencec D would be D2-D1 = 1. So, this is the same thing. The macro is going to produce a score for each 
condition, and if there are only two conditions (like in all the IAT variants), then the preference score would be 
the difference between the two.

ParcelVar = the parcel variable. The macro outputs a score for
each TRIAL_COND for each PARCEL. Use -1 if you are not interested in that.

trlExc = The macro does not use trials with TrlExc = 1.

Prefix = prefix for all the output variables.

OUTPUT (all are appended with the prefix variable):
prefix_Ncond , prefix_NCond1-prefix_NCond# = number of trials used for each condition (and with #, for each part).
prefix_R = overall average latency. prefix_R1-prefix_R# = mean latency for each part.
prefix_RTrialCond, prefix_RTrialCond1-# = the same but for each cond.
prefix_S = overall stddev. prefix_S1-prefix_S# = stddev for each part.
prefix_DTrialCond  = d-score for each TRIAL_COND.
prefix_DTrialCond1-prefix_DTrialCond# = d-score for each TRIAL_COND for each part.

usage:
1 Set the library [change the path to your path]:
libname libB 'C:\Documents and Settings\yoav\My Documents\school\BigFiles\dflt1';
2 Include the macro in your sas script:
%include 'C:\Documents and Settings\yoav\My Documents\school\work\stasExamples\scoreMac.sas';
3 Save your input data to the library:
data libB.inputDS; set myrawdata; run;
4 Call the macro
%score(libIn=libb, libOut=libb, indata=inputDS, outdata=outputDS, SUBJECT=session_id, TRIAL_COND=cond, 
TRIAL_LATENCY=trial_latency, parcelVar=part, trlExc=trlExc, prefix=scr, debug=0);
5 read the output data:
data outputDS; set libb.outputDS;	run;
6 look at the variable names:
proc contents short data=outputDS; run;
7 If you have only 2 within-conditions, say condA and condB, you will see two scores that have a correlation of 1 between them.
For instance, scr_dcondA and scr_dcondB. You can simply choose one of them as your score.
*/

%macro score(libIn, libOut, indata, outdata, SUBJECT, TRIAL_COND, TRIAL_LATENCY, parcelVar, trlExc, prefix, debug);

data inDS; set &libIn..&inData;
	/*create an ID for each condition on each parcel. notice that here the maximum number of parcels is 9. 
	But this could probably change. I'm not sure why I treat the parcelVar as a string here, and not as a number*/
	condPart = trim(&TRIAL_COND) || trim(substr(&parcelVar,length(&parcelVar),1));
	/*delete excluded trials*/
	if &trlExc = 1 then delete;
	keep &subject &parcelVar &trial_cond condPart &trial_latency;
run;

/**Latency**/
/*the names of the variables are going to be 
prefix_R, prefix_R# for parcels, prefix_RTrial_Cond, prefix_RTrial_Cond#*/
%let _r_ = _R;
%let var_r=&prefix&_r_;
/**compute overall mean for each subject*/
proc means data=inDS noprint; class &SUBJECT; var &TRIAL_LATENCY; output out=means  mean=R; run;
PROC SORT DATA=means; BY &SUBJECT; RUN;
proc transpose data=means name=name out=spdmeans prefix=R; by &SUBJECT; run;
data RAll; set spdmeans;where name = 'R'; &var_r=R1; keep &SUBJECT &var_r; run;
/*proc print data=RAll;run;*/
/**compute latency of each parcel for each subject*/
proc means data=inDS noprint; class &SUBJECT &parcelVar; 
var &TRIAL_LATENCY; output out=parmeans  mean=lat; run;
/*transpose to a row for each subject*/
PROC SORT DATA=parmeans; BY &SUBJECT; RUN;
proc transpose data=parmeans name=name out=spdmeans prefix=&var_r; id &parcelVar; by &SUBJECT; run;
data RPart; set spdmeans;where name = 'lat'; drop name; RUN;
/**compute latency of each condition for each subject*/
proc means data=inDS noprint; class &SUBJECT &TRIAL_COND; 
var &TRIAL_LATENCY; output out=means  mean=lat; run;
/*transpose to a row for each subject*/
PROC SORT DATA=means; BY &SUBJECT; RUN;
proc transpose data=means name=name out=spdmeans prefix=&var_r; id &TRIAL_COND; by &SUBJECT; run;
data RCond; set spdmeans;where name = 'lat'; drop name; RUN;
/*compute latency of each condition for each part for each subject*/
proc means data=inDS noprint; class &SUBJECT condPart; 
var &TRIAL_LATENCY; output out=means  mean=lat; run;
/*transpose to a row for each subject*/
PROC SORT DATA=means; BY &SUBJECT; RUN;
proc transpose data=means name=name out=spdmeans prefix=&var_r; id condPart; by &SUBJECT; run;
data RCondPart; set spdmeans;where name = 'lat'; drop name; RUN;
/*proc print data=RCondPart;run;*/

/**Standard deviation is computed only per subject and per part, not per condition because don't need that to compute the d**/
%let _s_ = _S;
%let var_s=&prefix&_s_;
/**compute overall std for each subject*/
proc means data=inDS noprint; class &SUBJECT; var &TRIAL_LATENCY; output out=means  std=sd; run;
PROC SORT DATA=means; BY &SUBJECT; RUN;
proc transpose data=means name=name out=spdmeans prefix=S; by &SUBJECT; run;
data SAll; set spdmeans;where name = 'sd'; &var_s=S1; keep &SUBJECT &var_s; run;
/*proc print data=SAll;run;*/
/**compute STD of each part for each subject*/
proc means data=inDS noprint; class &SUBJECT &parcelVar; 
var &TRIAL_LATENCY; output out=parstds std=sd; run;
/*transpose to a row for each subject*/
PROC SORT DATA=parstds; BY &SUBJECT; RUN;
proc transpose data=parstds name=name out=spdmeans prefix=&var_s; id &parcelVar; by &SUBJECT; run;
data SPart; set spdmeans;where name = 'sd'; drop name; RUN;

/**N of used trial is computed only per condition per session, and per condition per part**/
%let _n_ = _N;
%let var_n=&prefix&_n_;
/**Per cond per session**/
proc means data=inDS noprint; class &SUBJECT &TRIAL_COND; var &TRIAL_LATENCY; output out=means  N=num; run;
PROC SORT DATA=means; BY &SUBJECT; RUN;
proc transpose data=means name=name out=spdmeans prefix=&var_n; id &TRIAL_COND; by &SUBJECT; run;
data NCond; set spdmeans;where name = 'num'; drop name; run;
/*proc print data=NCond;run;*/
proc means data=inDS noprint; class &SUBJECT condPart; var &TRIAL_LATENCY; output out=means  N=num; run;
PROC SORT DATA=means; BY &SUBJECT; RUN;
proc transpose data=means name=name out=spdmeans prefix=&var_n; id condPart; by &SUBJECT; run;
data NCondPart; set spdmeans;where name = 'num'; drop name; run;
/*proc print data=NCondPart;run;*/

/***CALCULATE D Scores***/
%let _d_ = _D;
%let var_d=&prefix&_d_;
/**Per session**/
proc sort data=inDS; by &SUBJECT;run;
proc sort data=RAll; by &SUBJECT;run;
proc sort data=SAll; by &SUBJECT;run;
/*A d score for each trial*/
data dtrial; merge inDS RAll SAll; by &SUBJECT;trial_d = (&var_r-&TRIAL_LATENCY)/&var_s;
keep &Subject &TRIAL_COND trial_d; run;
/*Note: Computing a d for each trial saves us dealing with the overall mean and std latency later. 
Why is that ok to do so?  
In the old method: D = (meanCond1-meanCond2)/stdAll. 
This is the same as: d1 = (meanCond1-meanAll)/stdAll; d2 = (meanCond2-meanAll)/stdAll. D = d1-d2

[Why? d1-d2 = (meanCond1-meanAll)/stdAll - (meanCond2-meanAll)/stdAll = 
( (meanCond1-meanAll) - (meanCond2-meanAll) ) / stdAll = 
(meanCond1-meanAll - meanCond2 + meanAll) / stdAll = (meanCond1 - meanCond2) / stdAll]

In this macro I'm going to do this: mean((trialR - meanAll)/stdAll) per each cond, and this will be the d1 and d2... from above.
I argue that for each condition: mean((trialR - meanAll)/stdAll) = (meanCond-meanAll)/stdAll.
Indeed, we can break the left side of the equation to:
(mean(trialR) - mean(meanAll))/mean(stdAll), which is (mean(trialR) - meanAll)/stdAll.
Because: Mean((Xn-M)/S) = Sum((Xn-M)/S)/N = ((Sum(Xn) - Sum(M))/Sum(S))/N = (Sum(Xn)/N - Sum(M)/N) / Sum(S)/N =
(Sum(Xn)/N - M)/S = (mean(Xn) - M)/S = exactly what D-cond is: (meanCond-meanAll)/stdAll.
Or, if one needs an exmaple, let's say we have three trials in that condition:
mean((Xn-M)/S)= sum((Xn-M)/S)/N =
((x1-M)/S + (x2-M)/S +(x3-M)/S)/3 = 
((X1-M + X2-M + X3-M)/S) / 3 = 
((X1+X2+X3 - 3M)/S) / 3 = 
(3X1+3X2+3X3 - 9M)/S = 
(X1+X2+X3-3M)/3S = 
(X1+X2+X3)/3S - 3M/3S = 
Mean(X)/S - M/S = (Mean(X)-M)/S

*I know that anyone with some knowledge in arithmetics understood that right away, but i didn't.
*/

/*So now when we have a d score for trial, we can compute that mean of each condition:*/
proc means data=dtrial noprint; class &SUBJECT &TRIAL_COND; var trial_d; output out=means  mean=dlat; run;
PROC SORT DATA=means; BY &SUBJECT; RUN;
proc transpose data=means name=name out=spdmeans prefix=&var_d; id &TRIAL_COND; by &SUBJECT; run;
data DCond; set spdmeans;where name = 'dlat'; drop name; RUN;
/*proc print data=DCond;run;*/

/**Repeate the same logic for the session-parcel level**/
proc sort data=inDS; by &SUBJECT &ParcelVar;run;
/*These dataset are the output of proc means. 
All the variables are called lat, but they are different per pracel. It was before the transpose.*/
proc sort data=Parmeans; by &SUBJECT &ParcelVar;run;
proc sort data=Parstds; by &SUBJECT &ParcelVar;run;
/*proc print data=Parstds;run;*/

/*A d score for each trial*/
data dtrial; merge inDS Parmeans Parstds; by &SUBJECT &ParcelVar;trial_d = (lat-&TRIAL_LATENCY)/sd;
keep &Subject condpart trial_d; run;
/*proc print data=dtrial;run;*/
/*Now when we have a d score for trial, we can compute that mean of each condition:*/
proc means data=dtrial noprint; class &SUBJECT condPart; var trial_d; output out=means  mean=dlat; run;
PROC SORT DATA=means; BY &SUBJECT; RUN;
proc transpose data=means name=name out=spdmeans prefix=&var_d; id condPart; by &SUBJECT; run;
data DCondPart; set spdmeans;where name = 'dlat'; drop name; RUN;
/*proc print data=DCondPart;run;*/
*merge all;

proc sort data=RAll; by &SUBJECT; run;
proc sort data=RPart; by &SUBJECT; run;
proc sort data=RCond; by &SUBJECT; run;
proc sort data=RCondPart; by &SUBJECT; run;
proc sort data=SAll; by &SUBJECT; run;
proc sort data=SPart; by &SUBJECT; run;
proc sort data=NCond; by &SUBJECT; run;
proc sort data=NCondPart; by &SUBJECT; run;
proc sort data=DCond; by &SUBJECT; run;
proc sort data=DCondPart; by &SUBJECT; run;

data &libout..&outdata;	 merge RAll RPart RCond RCondPart SAll SPart NCond NCondPart DCond DCondPart; by &Subject; 
if &Subject > 0; drop _LABEL_; run;

%mend score;

%macro ampScore(libIn, libOut, indata, outdata, SUBJECT, TRIAL_COND, trlResp, parcelVar, prefix, debug);
data inDS; set &libIn..&inData;
	/*create an ID for each condition on each parcel. notice that here the maximum number of parcels is 9. 
	But this could probably change. I'm not sure why I treat the parcelVar as a string here, and not as a number*/
	condPart = trim(&TRIAL_COND) || trim(substr(&parcelVar,length(&parcelVar),1));
	keep &subject &parcelVar &trial_cond condPart &trlResp;
run;
/*t is for rate*/
%let _t_ = _t;
%let var_t=&prefix&_t_;
/**compute rate of pleasant responses per condition*/
proc means noprint data=inDS; class &SUBJECT  &TRIAL_COND; var &trlResp; output out=means  mean=plsntRate; run;
PROC SORT DATA=means; BY &SUBJECT &TRIAL_COND; RUN;
proc transpose data=means name=name out=spdmeans prefix=&var_t; id &TRIAL_COND; by &SUBJECT; run;
data scoreAll; set spdmeans;where name = 'plsntRate'; run;
/*proc print data=scoreAll;run;*/
/** compute rate of pleasant responses per parcel per condition*/
proc means noprint data=inDS; class &SUBJECT condPart; var &trlResp; output out=means  mean=plsntRate; run;
PROC SORT DATA=means; BY &SUBJECT condPart; RUN;
proc transpose data=means name=name out=spdmeans prefix=&var_t; id condPart; by &SUBJECT; run;
data parcelScores; set spdmeans;where name = 'plsntRate'; run;
/*proc print data=parcelScores;run;*/

proc sort data=scoreAll; by &SUBJECT; run;
proc sort data=parcelScores; by &SUBJECT; run;
data &libout..&outdata;	 merge scoreAll parcelScores; by &Subject; if &Subject > 0; drop _LABEL_; run;

%mend ampScore;

%macro spdScore(libIn, libOut, libTmp, indata, outdata, SUBJECT, TRIAL_COND, trlResp, trlEx, parcelVar, prefix, debug);
/*Speeded is scores pretty much the same as the AMP, only with 1-4 scale instead of 0 to 1. 
We only need to exclude trials (AMP does not have excluded trials) and convert the trial response to actual number*/
data &libTmp..tmp; set &libIn..&inData; run;
data &libTmp..tmp; set &libIn..&inData; response = input(&trlResp,4.0); if &trlEx=0; run;
%ampScore(libIn=libt, libOut=&libOut, indata=tmp, outdata=&outdata, SUBJECT=&SUBJECT, TRIAL_COND=&TRIAL_COND, 
trlResp=response, parcelVar=&parcelVar, prefix=&prefix, debug=&debug);
%mend spdScore;

